{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3</td>\n",
       "      <td>40.7145</td>\n",
       "      <td>-73.9425</td>\n",
       "      <td>3000</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>750.000000</td>\n",
       "      <td>-1.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7947</td>\n",
       "      <td>-73.9667</td>\n",
       "      <td>5465</td>\n",
       "      <td>2732.5</td>\n",
       "      <td>1821.666667</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7388</td>\n",
       "      <td>-74.0018</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.0</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7539</td>\n",
       "      <td>-73.9677</td>\n",
       "      <td>3275</td>\n",
       "      <td>1637.5</td>\n",
       "      <td>1637.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>40.8241</td>\n",
       "      <td>-73.9493</td>\n",
       "      <td>3350</td>\n",
       "      <td>1675.0</td>\n",
       "      <td>670.000000</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 225 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  latitude  longitude  price  price_bathrooms  \\\n",
       "0        1.5         3   40.7145   -73.9425   3000           1200.0   \n",
       "1        1.0         2   40.7947   -73.9667   5465           2732.5   \n",
       "2        1.0         1   40.7388   -74.0018   2850           1425.0   \n",
       "3        1.0         1   40.7539   -73.9677   3275           1637.5   \n",
       "4        1.0         4   40.8241   -73.9493   3350           1675.0   \n",
       "\n",
       "   price_bedrooms  room_diff  room_num  Year       ...        walk  walls  \\\n",
       "0      750.000000       -1.5       4.5  2016       ...           0      0   \n",
       "1     1821.666667       -1.0       3.0  2016       ...           0      0   \n",
       "2     1425.000000        0.0       2.0  2016       ...           0      0   \n",
       "3     1637.500000        0.0       2.0  2016       ...           0      0   \n",
       "4      670.000000       -3.0       5.0  2016       ...           0      0   \n",
       "\n",
       "   war  washer  water  wheelchair  wifi  windows  work  interest_level  \n",
       "0    0       0      0           0     0        0     0               1  \n",
       "1    0       0      0           0     0        0     0               2  \n",
       "2    0       0      0           0     0        0     0               0  \n",
       "3    0       0      0           0     0        0     0               2  \n",
       "4    1       0      0           0     0        0     0               2  \n",
       "\n",
       "[5 rows x 225 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv('RentListingInquries_FE_train.csv')\n",
    "test = pd.read_csv('RentListingInquries_FE_test.csv')\n",
    "predict_id = test['listing_id'].values\n",
    "test = test.drop('listing_id', axis=1)\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7185</td>\n",
       "      <td>-73.9865</td>\n",
       "      <td>2950</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7278</td>\n",
       "      <td>-74.0000</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7306</td>\n",
       "      <td>-73.9890</td>\n",
       "      <td>3758</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7109</td>\n",
       "      <td>-73.9571</td>\n",
       "      <td>3300</td>\n",
       "      <td>1650.000000</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7650</td>\n",
       "      <td>-73.9845</td>\n",
       "      <td>4900</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 224 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  latitude  longitude  price  price_bathrooms  \\\n",
       "0        1.0         1   40.7185   -73.9865   2950      1475.000000   \n",
       "1        1.0         2   40.7278   -74.0000   2850      1425.000000   \n",
       "2        1.0         1   40.7306   -73.9890   3758      1879.000000   \n",
       "3        1.0         2   40.7109   -73.9571   3300      1650.000000   \n",
       "4        2.0         2   40.7650   -73.9845   4900      1633.333333   \n",
       "\n",
       "   price_bedrooms  room_diff  room_num  Year  ...   virtual  walk  walls  war  \\\n",
       "0     1475.000000        0.0       2.0  2016  ...         0     0      0    0   \n",
       "1      950.000000       -1.0       3.0  2016  ...         0     0      0    1   \n",
       "2     1879.000000        0.0       2.0  2016  ...         0     0      0    0   \n",
       "3     1100.000000       -1.0       3.0  2016  ...         0     0      0    0   \n",
       "4     1633.333333        0.0       4.0  2016  ...         0     0      0    1   \n",
       "\n",
       "   washer  water  wheelchair  wifi  windows  work  \n",
       "0       0      0           0     0        0     0  \n",
       "1       0      0           0     0        0     0  \n",
       "2       0      0           0     0        0     0  \n",
       "3       0      0           1     0        0     0  \n",
       "4       0      0           0     0        0     0  \n",
       "\n",
       "[5 rows x 224 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 49352 entries, 0 to 49351\n",
      "Columns: 225 entries, bathrooms to interest_level\n",
      "dtypes: float64(7), int64(218)\n",
      "memory usage: 84.7 MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>49352.00000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.0</td>\n",
       "      <td>...</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.21218</td>\n",
       "      <td>1.541640</td>\n",
       "      <td>40.741545</td>\n",
       "      <td>-73.955716</td>\n",
       "      <td>3.830174e+03</td>\n",
       "      <td>1.697863e+03</td>\n",
       "      <td>1.657567e+03</td>\n",
       "      <td>-0.329460</td>\n",
       "      <td>2.753820</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003080</td>\n",
       "      <td>0.000385</td>\n",
       "      <td>0.186477</td>\n",
       "      <td>0.009361</td>\n",
       "      <td>0.000446</td>\n",
       "      <td>0.028165</td>\n",
       "      <td>0.002026</td>\n",
       "      <td>0.001013</td>\n",
       "      <td>0.000952</td>\n",
       "      <td>1.616895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.50142</td>\n",
       "      <td>1.115018</td>\n",
       "      <td>0.638535</td>\n",
       "      <td>1.177912</td>\n",
       "      <td>2.206687e+04</td>\n",
       "      <td>1.100477e+04</td>\n",
       "      <td>7.817996e+03</td>\n",
       "      <td>0.947732</td>\n",
       "      <td>1.446091</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.055412</td>\n",
       "      <td>0.019618</td>\n",
       "      <td>0.389495</td>\n",
       "      <td>0.101625</td>\n",
       "      <td>0.021109</td>\n",
       "      <td>0.165446</td>\n",
       "      <td>0.044969</td>\n",
       "      <td>0.031814</td>\n",
       "      <td>0.030846</td>\n",
       "      <td>0.626035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-118.271000</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>2.150000e+01</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>-5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.728300</td>\n",
       "      <td>-73.991700</td>\n",
       "      <td>2.500000e+03</td>\n",
       "      <td>1.225000e+03</td>\n",
       "      <td>1.066667e+03</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.751800</td>\n",
       "      <td>-73.977900</td>\n",
       "      <td>3.150000e+03</td>\n",
       "      <td>1.500000e+03</td>\n",
       "      <td>1.383417e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>40.774300</td>\n",
       "      <td>-73.954800</td>\n",
       "      <td>4.100000e+03</td>\n",
       "      <td>1.850000e+03</td>\n",
       "      <td>1.962500e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>10.00000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>44.883500</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.490000e+06</td>\n",
       "      <td>2.245000e+06</td>\n",
       "      <td>1.496667e+06</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>13.500000</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 225 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         bathrooms      bedrooms      latitude     longitude         price  \\\n",
       "count  49352.00000  49352.000000  49352.000000  49352.000000  4.935200e+04   \n",
       "mean       1.21218      1.541640     40.741545    -73.955716  3.830174e+03   \n",
       "std        0.50142      1.115018      0.638535      1.177912  2.206687e+04   \n",
       "min        0.00000      0.000000      0.000000   -118.271000  4.300000e+01   \n",
       "25%        1.00000      1.000000     40.728300    -73.991700  2.500000e+03   \n",
       "50%        1.00000      1.000000     40.751800    -73.977900  3.150000e+03   \n",
       "75%        1.00000      2.000000     40.774300    -73.954800  4.100000e+03   \n",
       "max       10.00000      8.000000     44.883500      0.000000  4.490000e+06   \n",
       "\n",
       "       price_bathrooms  price_bedrooms     room_diff      room_num     Year  \\\n",
       "count     4.935200e+04    4.935200e+04  49352.000000  49352.000000  49352.0   \n",
       "mean      1.697863e+03    1.657567e+03     -0.329460      2.753820   2016.0   \n",
       "std       1.100477e+04    7.817996e+03      0.947732      1.446091      0.0   \n",
       "min       2.150000e+01    4.300000e+01     -5.000000      0.000000   2016.0   \n",
       "25%       1.225000e+03    1.066667e+03     -1.000000      2.000000   2016.0   \n",
       "50%       1.500000e+03    1.383417e+03      0.000000      2.000000   2016.0   \n",
       "75%       1.850000e+03    1.962500e+03      0.000000      4.000000   2016.0   \n",
       "max       2.245000e+06    1.496667e+06      8.000000     13.500000   2016.0   \n",
       "\n",
       "            ...                walk         walls           war        washer  \\\n",
       "count       ...        49352.000000  49352.000000  49352.000000  49352.000000   \n",
       "mean        ...            0.003080      0.000385      0.186477      0.009361   \n",
       "std         ...            0.055412      0.019618      0.389495      0.101625   \n",
       "min         ...            0.000000      0.000000      0.000000      0.000000   \n",
       "25%         ...            0.000000      0.000000      0.000000      0.000000   \n",
       "50%         ...            0.000000      0.000000      0.000000      0.000000   \n",
       "75%         ...            0.000000      0.000000      0.000000      0.000000   \n",
       "max         ...            1.000000      1.000000      1.000000      2.000000   \n",
       "\n",
       "              water    wheelchair          wifi       windows          work  \\\n",
       "count  49352.000000  49352.000000  49352.000000  49352.000000  49352.000000   \n",
       "mean       0.000446      0.028165      0.002026      0.001013      0.000952   \n",
       "std        0.021109      0.165446      0.044969      0.031814      0.030846   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "max        1.000000      1.000000      1.000000      1.000000      1.000000   \n",
       "\n",
       "       interest_level  \n",
       "count    49352.000000  \n",
       "mean         1.616895  \n",
       "std          0.626035  \n",
       "min          0.000000  \n",
       "25%          1.000000  \n",
       "50%          2.000000  \n",
       "75%          2.000000  \n",
       "max          2.000000  \n",
       "\n",
       "[8 rows x 225 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAELCAYAAAARNxsIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGq9JREFUeJzt3XvUXXV95/H3h3CRVm2gpAzDpaGajo12GjVCpnR1LCoEXDXUhQozlZRhGV2CxVWnA1qVVqQtbdVVZpSZdBmFLitQvJA6wTSlqKMtl4AIBGRI8UIyCJGLXFxiwe/8cX6PHMKT59mJ++RwyPu11l5n7+/+/fb+njwLvmvv32/vk6pCkqQ+7DbuBCRJzxwWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1BuLiiSpN7uPO4Gdbb/99qv58+ePOw1JmijXXXfdd6tq3mztdrmiMn/+fNavXz/uNCRpoiT5Vpd23v6SJPVmZEUlybOSXJPka0k2JPmjFv94km8kuaEti1o8Sc5LsjHJjUleMnSs5Ulub8vyofhLk9zU+pyXJKP6PpKk2Y3y9tejwJFV9XCSPYAvJ7m87fv9qrp0q/bHAAvacjhwPnB4kn2Bs4DFQAHXJVldVfe3Nm8CrgbWAEuBy5EkjcXIrlRq4OG2uUdbZnrP/jLgwtbvKmBukgOAo4F1VXVfKyTrgKVt33Or6qoavL//QuC4UX0fSdLsRjqmkmROkhuAexgUhqvbrnPaLa4PJdmrxQ4E7hzqvqnFZopvmiY+XR4rkqxPsn7Lli0/8feSJE1vpEWlqh6vqkXAQcBhSV4EvBN4AfAyYF/gjFHm0PJYWVWLq2rxvHmzzoiTJO2gnTL7q6oeAK4EllbVXe0W16PAx4DDWrPNwMFD3Q5qsZniB00TlySNyShnf81LMret7w28Cvh6GwuhzdQ6Dri5dVkNnNRmgS0BvldVdwFrgaOS7JNkH+AoYG3b92CSJe1YJwGXjer7SJJmN8rZXwcAFySZw6B4XVJVn0vyj0nmAQFuAN7S2q8BjgU2At8HTgaoqvuSnA1c29q9r6rua+tvBT4O7M1g1pczvyRpjDKYOLXrWLx4cflEvfT0dsR/P2LcKTzjfeVtX9mu9kmuq6rFs7XziXpJUm8sKpKk3lhUJEm9sahIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1BuLiiSpNxYVSVJvLCqSpN5YVCRJvbGoSJJ6Y1GRJPVmZEUlybOSXJPka0k2JPmjFj80ydVJNia5OMmeLb5X297Y9s8fOtY7W/y2JEcPxZe22MYkZ47qu0iSuhnllcqjwJFV9SvAImBpkiXAucCHqur5wP3AKa39KcD9Lf6h1o4kC4ETgBcCS4GPJJmTZA7wYeAYYCFwYmsrSRqTkRWVGni4be7RlgKOBC5t8QuA49r6srZN2/+KJGnxi6rq0ar6BrAROKwtG6vqjqr6IXBRaytJGpORjqm0K4obgHuAdcC/AA9U1WOtySbgwLZ+IHAnQNv/PeBnh+Nb9dlWXJI0JiMtKlX1eFUtAg5icGXxglGeb1uSrEiyPsn6LVu2jCMFSdol7JTZX1X1AHAl8B+AuUl2b7sOAja39c3AwQBt/88A9w7Ht+qzrfh0519ZVYuravG8efN6+U6SpKca5eyveUnmtvW9gVcBtzIoLse3ZsuBy9r66rZN2/+PVVUtfkKbHXYosAC4BrgWWNBmk+3JYDB/9ai+jyRpdrvP3mSHHQBc0GZp7QZcUlWfS3ILcFGS9wNfBT7a2n8U+OskG4H7GBQJqmpDkkuAW4DHgFOr6nGAJKcBa4E5wKqq2jDC7yNJmsXIikpV3Qi8eJr4HQzGV7aO/wB43TaOdQ5wzjTxNcCanzhZSVIvfKJektQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1BuLiiSpNxYVSVJvLCqSpN5YVCRJvbGoSJJ6Y1GRJPVm1qKS5BeTXJHk5rb975O8e/SpSZImTZcrlb8C3gn8K/z4x7dOGGVSkqTJ1KWo/FRVXbNV7LFRJCNJmmxdisp3kzwPKIAkxwN3jTQrSdJE6vIb9acCK4EXJNkMfAP47ZFmJUmaSLMWlaq6A3hlkp8Gdquqh0afliRpEnWZ/fXHSeZW1SNV9VCSfZK8v0O/g5NcmeSWJBuSnN7if5hkc5Ib2nLsUJ93JtmY5LYkRw/Fl7bYxiRnDsUPTXJ1i1+cZM/t/yeQJPWly5jKMVX1wNRGVd0PHDtD+ymPAe+oqoXAEuDUJAvbvg9V1aK2rAFo+04AXggsBT6SZE6SOcCHgWOAhcCJQ8c5tx3r+cD9wCkd8pIkjUiXojInyV5TG0n2BvaaoT0AVXVXVV3f1h8CbgUOnKHLMuCiqnq0qr4BbAQOa8vGqrqjqn4IXAQsSxLgSODS1v8C4LgO30eSNCJdisongCuSnJLkFGAdg/+Bd5ZkPvBi4OoWOi3JjUlWJdmnxQ4E7hzqtqnFthX/WeCBqnpsq7gkaUxmLSpVdS5wDvBLbTm7qv6s6wmSPBv4FPD2qnoQOB94HrCIwdTkD+xA3tslyYok65Os37Jly6hPJ0m7rC5Tiqmqy4HLt/fgSfZgUFA+UVWfbse6e2j/XwGfa5ubgYOHuh/UYmwjfi8wN8nu7WpluP3W+a9kMC2axYsX1/Z+D0lSN11mf702ye1JvpfkwSQPJXmwQ78AHwVuraoPDsUPGGr2W8DNbX01cEKSvZIcCiwArgGuBRa0mV57MhjMX11VBVwJHN/6Lwcumy0vSdLodLlS+TPgN6vq1u089hHAG4GbktzQYu9iMHtrEYMn9L8JvBmgqjYkuQS4hcHMsVOr6nGAJKcBa4E5wKqq2tCOdwZwUZvi/FUGRUySNCZdisrdO1BQqKovA5lm15oZ+pzDYPxm6/ia6fq1BzMP297cJEmj0aWorE9yMfBZ4NGp4NQYiSRJU7oUlecC3weOGooVYFGRJD1Jl3d/nbwzEpEkTb5Zi0qSZzF4/ckLgWdNxavqv4wwL0nSBOryRP1fA/8GOBr4IoPnQXxTsSTpKboUledX1XuAR6rqAuDVwOGjTUuSNIm6FJV/bZ8PJHkR8DPAz40uJUnSpOoy+2tle+njuxk89f5s4D0jzUqSNJFmLCpJdgMebL+h8iXgF3ZKVpKkiTTj7a+q+hHw33ZSLpKkCddlTOUfkvzX9vPA+04tI89MkjRxuoypvKF9njoUK7wVJknaSpcn6g/dGYlIkiZflyfqT5ouXlUX9p+OJGmSdbn99bKh9WcBrwCuBywqkqQn6XL7623D20nmAheNLCNJ0sTqMvtra48AjrNIkp6iy5jK3zGY7QWDIrQQuGSUSUmSJlOXMZW/GFp/DPhWVW0aUT6SpAnWpah8G7irqn4AkGTvJPOr6psjzUySNHG6jKn8LfCjoe3HW0ySpCfpUlR2r6ofTm209T1n69Re63JlkluSbEhyeovvm2Rdktvb5z4tniTnJdmY5MYkLxk61vLW/vYky4fiL01yU+tzXpJsz5eXJPWrS1HZkuQ1UxtJlgHf7dDvMeAdVbUQWAKcmmQhcCZwRVUtAK5o2wDHAAvasgI4v51vX+AsBj8Mdhhw1lQham3eNNRvaYe8JEkj0qWovAV4V5JvJ/k2cAbw5tk6VdVdVXV9W38IuBU4EFgGXNCaXQAc19aXARfWwFXA3CQHMPgZ43VVdV97Bf86YGnb99yquqqqisHDmFPHkiSNQZeHH/8FWJLk2W374e09SZL5wIuBq4H9q+qutus7wP5t/UDgzqFum1pspvimaeKSpDGZ9UolyR8nmVtVD1fVw0n2SfL+ridoxehTwNur6sHhfe0Ko6bt2KMkK5KsT7J+y5Ytoz6dJO2yutz+OqaqHpjaaLegju1y8CR7MCgon6iqT7fw3e3WFe3znhbfDBw81P2gFpspftA08aeoqpVVtbiqFs+bN69L6pKkHdClqMxJstfURpK9gb1maD/VLsBHgVur6oNDu1YDUzO4lgOXDcVParPAlgDfa7fJ1gJHtSukfYCjgLVt34NJlrRznTR0LEnSGHR5+PETwBVJPta2T+aJgfaZHAG8EbgpyQ0t9i7gT4FLkpwCfAt4fdu3hsEV0Ebg++08VNV9Sc4Grm3t3ldV97X1twIfB/YGLm+LJGlMugzUn5vka8ArW+jsqlrbod+XgW09N/KKadoXT/51yeF9q4BV08TXAy+aLRdJ0s7R5UoF4KvAHgwG1b86unQkSZOsy+yv1wPXAMczuFV1dZLjR52YJGnydLlS+QPgZVV1D0CSecA/AJeOMjFJ0uTpMvtrt6mC0tzbsZ8kaRfT5Url80nWAp9s229gMFNLkqQn6TL76/eTvBb4tRZaWVWfGW1akqRJ1Gn2V3sa/tOzNpQk7dIcG5Ek9caiIknqzTaLSpIr2ue5Oy8dSdIkm2lM5YAkvwq8JslFbPXKlakf4JIkacpMReW9wHsYvFL+g1vtK+DIUSUlSZpM2ywqVXUpcGmS91TV2TsxJ0nShOrynMrZSV4D/HoLfaGqPjfatCRJk6jLCyX/BDgduKUtpyf541EnJkmaPF0efnw1sKiqfgSQ5AIGr79/1ygTkyRNnq7PqcwdWv+ZUSQiSZp8Xa5U/gT4apIrGUwr/nXgzJFmJUmaSF0G6j+Z5AvAy1rojKr6zkizkiRNpK4vlLwLWD3iXCRJE853f0mSemNRkST1ZsaikmROkq/vyIGTrEpyT5Kbh2J/mGRzkhvacuzQvncm2ZjktiRHD8WXttjGJGcOxQ9NcnWLX5xkzx3JU5LUnxmLSlU9DtyW5JAdOPbHgaXTxD9UVYvasgYgyULgBOCFrc9HWkGbA3wYOAZYCJzY2gKc2471fOB+4JQdyFGS1KMuA/X7ABuSXAM8MhWsqtfM1KmqvpRkfsc8lgEXVdWjwDeSbAQOa/s2VtUdAO1tycuS3MrghZb/qbW5APhD4PyO55MkjUCXovKens95WpKTgPXAO6rqfuBA4KqhNptaDODOreKHAz8LPFBVj03T/imSrABWABxyyI5cdEmSuph1oL6qvgh8E9ijrV8L7OhvqZwPPA9YBNwFfGAHj7NdqmplVS2uqsXz5s3bGaeUpF1SlxdKvgm4FPhfLXQg8NkdOVlV3V1Vj7f3iP0VT9zi2gwcPNT0oBbbVvxeYG6S3beKS5LGqMuU4lOBI4AHAarqduDnduRkSQ4Y2vwtYGpm2GrghCR7JTkUWABcw+CqaEGb6bUng8H81VVVwJXA8a3/cuCyHclJktSfLmMqj1bVD5PBrwm3q4OarVOSTwIvB/ZLsgk4C3h5kkWt/zeBNwNU1YYklzB4tf5jwKlt5hlJTgPWAnOAVVW1oZ3iDOCiJO9n8Nbkj3b5wpKk0elSVL6Y5F3A3kleBbwV+LvZOlXVidOEt/k//qo6BzhnmvgaYM008Tt44vaZJOlpoMvtrzOBLcBNDK4s1gDvHmVSkqTJ1OUtxT9qP8x1NYPbVre1MQ1Jkp5k1qKS5NXA/wT+hcHvqRya5M1Vdfmok5MkTZYuYyofAH6jqjYCJHke8L8Bi4ok6Um6jKk8NFVQmjuAh0aUjyRpgm3zSiXJa9vq+iRrgEsYjKm8jsHzI5IkPclMt79+c2j9buA/tvUtwN4jy0iSNLG2WVSq6uSdmYgkafJ1mf11KPA2YP5w+9lefS9J2vV0mf31WQZPwv8d8KPRpiNJmmRdisoPquq8kWciSZp4XYrKXyY5C/h74NGpYFXt6G+qSJKeoboUlV8G3sjg53unbn9V25Yk6ce6FJXXAb9QVT8cdTKSpMnW5Yn6m4G5o05EkjT5ulypzAW+nuRanjym4pRiSdKTdCkqZ408C0nSM0KX31P54s5IROrTt9/3y+NOYZdwyHtvGncKeprp8kT9Qzzxm/R7AnsAj1TVc0eZmCRp8nS5UnnO1HqSAMuAJaNMSpI0mbrM/vqxGvgscPSI8pEkTbBZi0qS1w4txyf5U+AHHfqtSnJPkpuHYvsmWZfk9va5T4snyXlJNia5MclLhvosb+1vT7J8KP7SJDe1Pue1qyhJ0hh1uVL5zaHlaAa/+risQ7+PA0u3ip0JXFFVC4Ar2jbAMcCCtqwAzodBEWIw++xw4DDgrKlC1Nq8aajf1ueSJO1kXcZUduh3VarqS0nmbxVeBry8rV8AfAE4o8UvrKoCrkoyN8kBre26qroPIMk6YGmSLwDPraqrWvxC4Djg8h3JVZLUj5l+Tvi9M/Srqjp7B863f1Xd1da/A+zf1g8E7hxqt6nFZopvmiY+rSQrGFwBccghh+xA2pKkLma6/fXINAvAKQyuLn4i7aqkZm3Yg6paWVWLq2rxvHnzdsYpJWmXNNPPCX9gaj3Jc4DTgZOBi4APbKvfLO5OckBV3dVub93T4puBg4faHdRim3nidtlU/AstftA07SVJYzTjQH2brfV+4EYGBeglVXVGVd0zU78ZrAamZnAtBy4bip/UZoEtAb7XbpOtBY5Ksk8boD8KWNv2PZhkSZv1ddLQsSRJYzLTmMqfA68FVgK/XFUPb8+Bk3ySwVXGfkk2MZjF9afAJUlOAb4FvL41XwMcC2wEvs/gioiqui/J2cC1rd37pgbtgbcymGG2N4MBegfpJWnMZpr99Q4GbyV+N/AHQ4+BhMGQyIyvaamqE7ex6xXTtC3g1G0cZxWwapr4euBFM+UgSdq5ZhpT2a6n7SVJsnBIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1BuLiiSpNxYVSVJvLCqSpN5YVCRJvbGoSJJ6Y1GRJPXGoiJJ6o1FRZLUm7EUlSTfTHJTkhuSrG+xfZOsS3J7+9ynxZPkvCQbk9yY5CVDx1ne2t+eZPk4vosk6QnjvFL5japaVFWL2/aZwBVVtQC4om0DHAMsaMsK4HwYFCHgLOBw4DDgrKlCJEkaj6fT7a9lwAVt/QLguKH4hTVwFTA3yQHA0cC6qrqvqu4H1gFLd3bSkqQnjKuoFPD3Sa5LsqLF9q+qu9r6d4D92/qBwJ1DfTe12LbikqQx2X1M5/21qtqc5OeAdUm+PryzqipJ9XWyVrhWABxyyCF9HVaStJWxXKlU1eb2eQ/wGQZjIne321q0z3ta883AwUPdD2qxbcWnO9/KqlpcVYvnzZvX51eRJA3Z6UUlyU8nec7UOnAUcDOwGpiawbUcuKytrwZOarPAlgDfa7fJ1gJHJdmnDdAf1WKSpDEZx+2v/YHPJJk6/99U1eeTXAtckuQU4FvA61v7NcCxwEbg+8DJAFV1X5KzgWtbu/dV1X0772tIkra204tKVd0B/Mo08XuBV0wTL+DUbRxrFbCq7xwlSTvm6TSlWJI04SwqkqTejGtK8UR46e9fOO4UnvGu+/OTxp2CpB55pSJJ6o1FRZLUG4uKJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1JuJLypJlia5LcnGJGeOOx9J2pVNdFFJMgf4MHAMsBA4McnC8WYlSbuuiS4qwGHAxqq6o6p+CFwELBtzTpK0y5r0onIgcOfQ9qYWkySNwe7jTmBnSLICWNE2H05y2zjzGbH9gO+OO4mu8hfLx53C08lE/e0AOCvjzuDpZKL+fvnd7f7b/XyXRpNeVDYDBw9tH9RiT1JVK4GVOyupcUqyvqoWjzsPbT//dpPNv9/ApN/+uhZYkOTQJHsCJwCrx5yTJO2yJvpKpaoeS3IasBaYA6yqqg1jTkuSdlkTXVQAqmoNsGbceTyN7BK3+Z6h/NtNNv9+QKpq3DlIkp4hJn1MRZL0NGJReYbwdTWTK8mqJPckuXncuWj7JDk4yZVJbkmyIcnp485p3Lz99QzQXlfzf4FXMXgA9FrgxKq6ZayJqZMkvw48DFxYVS8adz7qLskBwAFVdX2S5wDXAcftyv/teaXyzODraiZYVX0JuG/ceWj7VdVdVXV9W38IuJVd/K0eFpVnBl9XI41ZkvnAi4Grx5vJeFlUJOknlOTZwKeAt1fVg+POZ5wsKs8MnV5XI6l/SfZgUFA+UVWfHnc+42ZReWbwdTXSGCQJ8FHg1qr64LjzeTqwqDwDVNVjwNTram4FLvF1NZMjySeBfwb+XZJNSU4Zd07q7AjgjcCRSW5oy7HjTmqcnFIsSeqNVyqSpN5YVCRJvbGoSJJ6Y1GRJPXGoiJJ6o1FRZLUG4uKBCT5pw5t3p7kp0acx6LZnnNI8jtJ/kfP5+39mNo1WVQkoKp+tUOztwPbVVTazxJsj0XALv3wnCabRUUCkjzcPl+e5AtJLk3y9SSfyMDvAv8WuDLJla3tUUn+Ocn1Sf62vVSQJN9Mcm6S64HXJXleks8nuS7J/0nygtbudUluTvK1JF9qr9h5H/CG9mT2GzrkPS/Jp5Jc25YjkuzWcpg71O72JPtP1773f0zt0nYfdwLS09CLgRcC/w/4CnBEVZ2X5PeA36iq7ybZD3g38MqqeiTJGcDvMSgKAPdW1UsAklwBvKWqbk9yOPAR4EjgvcDRVbU5ydyq+mGS9wKLq+q0jrn+JfChqvpykkOAtVX1S0kuA34L+Fg757eq6u4kf7N1e+CXfsJ/L+nHLCrSU11TVZsAktwAzAe+vFWbJcBC4CuDdwqyJ4P3d025uPV/NvCrwN+2dgB7tc+vAB9Pcgmwo2+3fSWwcOjYz23nvJhB0foYgxeMXjxLe6kXFhXpqR4dWn+c6f87CbCuqk7cxjEeaZ+7AQ9U1aKtG1TVW9pVxKuB65K8dAdy3Q1YUlU/eFJyyT8Dz08yDzgOeP8s7Xfg1NJTOaYidfcQ8Jy2fhVwRJLnAyT56SS/uHWH9oNN30jyutYuSX6lrT+vqq6uqvcCWxj8Js7wObr4e+BtUxtJFrXzFvAZ4IMMXst+70ztpb5YVKTuVgKfT3JlVW0Bfgf4ZJIbGdz6esE2+v1n4JQkXwM2AMta/M+T3JTkZuCfgK8BVzK4PdVpoB74XWBxkhuT3AK8ZWjfxcBv88Str9naSz8xX30vSeqNVyqSpN44UC89TSU5GTh9q/BXqurUceQjdeHtL0lSb7z9JUnqjUVFktQbi4okqTcWFUlSbywqkqTe/H/CcP1LB6IrVQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x12227e748>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(train.interest_level);\n",
    "plt.xlabel('interest_level');\n",
    "plt.ylabel('Number of occurance');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['interest_level']\n",
    "train = train.drop('interest_level', axis=1)\n",
    "X_train = np.array(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/model_selection/_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, train_size=0.8, random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#default SVC\n",
    "from sklearn.svm import LinearSVC\n",
    "SVC1 = LinearSVC().fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification report for classifier LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
      "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
      "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
      "     verbose=0):\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.00      0.00      0.00       753\n",
      "          1       0.30      0.86      0.44      2221\n",
      "          2       0.89      0.46      0.60      6897\n",
      "\n",
      "avg / total       0.69      0.51      0.52      9871\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[   0  674   79]\n",
      " [   0 1901  320]\n",
      " [   0 3752 3145]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "y_predict = SVC1.predict(X_test)\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (SVC1, classification_report(y_test, y_predict)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_grid_point_Linear(C, X_train, y_train, X_test, y_test):\n",
    "    SVC2 = LinearSVC(C=C)\n",
    "    SVC2 = SVC2.fit(X_train, y_train)\n",
    "    accuracy = SVC2.score(X_test, y_test)\n",
    "    print('accuracy:{}'.format(accuracy))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy:0.6737919157126937\n",
      "accuracy:0.6416776415763347\n",
      "accuracy:0.6989160166143248\n",
      "accuracy:0.5802856853408975\n",
      "accuracy:0.5294296423867896\n",
      "accuracy:0.5631648262587378\n",
      "accuracy:0.5841353459629217\n"
     ]
    }
   ],
   "source": [
    "C_s = np.logspace(-3, 3, 7)\n",
    "\n",
    "accuracy_s = []\n",
    "for i, oneC in enumerate(C_s):\n",
    "    tmp = fit_grid_point_Linear(oneC, X_train, y_train, X_test, y_test)\n",
    "    accuracy_s.append(tmp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEKCAYAAAA4t9PUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmYFNX1//H3YROiEkUw4gZGQUVR1AGNK+4YkUVQNmk0LjHGoElUUOOGe2Li9uOrEnfZRUEUFDHBJSjKgLgAiiMugBuguCACI+f3x63RdhyYZmaqq7vn83qefuiurqo+Bdqn695z7zV3R0REpKrqJB2AiIjkNyUSERGpFiUSERGpFiUSERGpFiUSERGpFiUSERGpFiUSERGpFiUSERGpFiUSERGplnpJB5ANTZs29ZYtWyYdhohIXpk1a9Yyd29W2X61IpG0bNmS4uLipMMQEckrZvZBJvupaUtERKpFiURERKpFiURERKol1kRiZp3M7G0zKzGzwRW8f7OZzYkeC8xsRdp7A8zsnegxIG37fmb2RnTO28zM4rwGERHZsNg6282sLjAUOBpYDMw0s4nuPq9sH3f/c9r+fwL2iZ43Aa4AigAHZkXHfgHcAZwJvAxMBjoBT8Z1HSIismFx3pF0AErcfaG7rwFGA103sH8fYFT0/Fhgqrt/HiWPqUAnM2sONHb3GR5W5HoQ6BbfJYiISGXiTCTbAYvSXi+Otv2MmbUAdgL+W8mx20XPKz2niIhkR650tvcGxrn79zV1QjM7y8yKzax46dKlNXVaSdjKlfDQQ7BmTdKRiEiZOBPJEmCHtNfbR9sq0psfm7U2dOyS6Hml53T3Ye5e5O5FzZpVOjBT8oA7nHUWpFLw4INJRyMiZeJMJDOBVma2k5k1ICSLieV3MrPdgC2Bl9I2TwGOMbMtzWxL4Bhgirt/DHxlZgdE1Vop4LEYr0FyyLBhMHIk1K+vRCKSS2JLJO5eCpxLSArzgbHuPtfMhphZl7RdewOjo87zsmM/B64mJKOZwJBoG8A5wN1ACfAuqtiqFWbPhoED4dhj4fLL4YUXYOHCpKMSEQBL+/4uWEVFRa65tvLXihWw336hX+TVV+Hbb6FFC7jqqpBURCQeZjbL3Ysq2y9XOttFKuQOv/sdfPghjBkDTZvCjjvC4YeHTvda8DtIJOcpkUhOu+UWGD8ebrwRDjzwx+2pFJSUwIwZycUmIoESieSsl16Ciy6Cbt3gz3/+6Xs9ekCjRup0F8kFSiSSk5Ytg5NPDs1Y990H5WdU23xz6N4dRo+G1auTiVFEAiUSyTnr1kH//vDZZ/Dww7DFFhXvl0qFjvgnnshufCLyU0okknOuvx6eegpuvRX23Xf9+x15JDRvruYtkaQpkUhOmTYtlPT27Qu///2G961XD/r1g8mTQbPgiCRHiURyxscfQ58+0Lo13HXXz/tFKpJKQWlpKA0WkWQokUhOKC0NSeSrr2DcONhss8yOa9sW2rVT85ZIkpRIJCdccQU89xzceSfsscfGHZtKwcyZMH9+PLGJyIYpkUjiJk+G666DM84ISWFj9ekDdeqEke4ikn1KJJKoDz8Mpb577w233Va1c2yzTZjM8aGHQumwiGSXEokkZs2aMOhw7dowXqRRo6qfK5WCxYvh2WdrLDwRyZASiSRm0CB4+WW4915o1ap65+raFRo3Vqe7SBKUSCQRjzwSJmQcOBB69qz++Ro1gpNOCuddubL65xORzCmRSNaVlISp4Tt0gH/8o+bOm0rBN9/AhAk1d04RqZwSiWTVqlXhzqFuXRg7Fho0qLlzH3wwtGyp5i2RbFMikaw6/3yYMydUWLVoUbPnrlMHTjkFnnkGliyp2XOLyPopkUjWDB8Ow4bB4MFw/PHxfEb//qEEeOTIeM4vIj+nRCJZMW9emITx0EPh6qvj+5zWreGAA0LzlpbhFcmOWBOJmXUys7fNrMTMBq9nn5PNbJ6ZzTWzkdG2w81sTtrjOzPrFr13v5m9l/ZeuzivQarvm29CZdZmm4WFqOrVi/fzUil480147bV4P0dEgtgSiZnVBYYCxwFtgD5m1qbcPq2Ai4GD3H0P4HwAd5/m7u3cvR1wBPAt8HTaoReWve/uc+K6Bqk+dzj7bHjrLRg1KqwfErdevaB+fXW6i2RLnHckHYASd1/o7muA0UDXcvucCQx19y8A3P2zCs7TE3jS3b+NMVaJyb//DSNGwFVXwRFHZOczmzSBE04In1tamp3PFKnN4kwk2wGL0l4vjralaw20NrPpZjbDzDpVcJ7ewKhy2641s9fN7GYz26TmQpaaNHt2GHB47LFw6aXZ/exUKizV+/TTle8rItWTdGd7PaAV0BHoA/zbzH5YodvMmgNtgSlpx1wM7Aa0B5oAgyo6sZmdZWbFZla8tIrL5y1YAB98UKVDa70vvwzjRZo2DaW+dbL8X9pxx8FWW6l5SyQb4vzfewmwQ9rr7aNt6RYDE919rbu/BywgJJYyJwPj3X1t2QZ3/9iD1cB9hCa0n3H3Ye5e5O5FzZo12+jg3cPo63btYPz4jT68VnOH004LM/uOHQtV+OuvtgYNoHfvMMp9xYrsf75IbRJnIpkJtDKzncysAaGJamK5fSYQ7kYws6aEpq6Fae/3oVyzVnSXgpkZ0A14M47gzeCBB2CXXeDEE+Hcc+G77+L4pMJz660h+d54Ixx4YHJxpFKwenVYcVFE4hNbInH3UuBcQrPUfGCsu881syFm1iXabQqw3MzmAdMI1VjLAcysJeGO5rlypx5hZm8AbwBNgWviuoadd4bp0+HPf4ahQ+E3vwnNXbJ+L70EF14I3bqFv7cktW8Pu+6qBa9E4mZeC0ZtFRUVeXFxcbXO8cQTMGBA+IV7551hKg75qWXLYJ99Qunt7NmwxRaVHxO3664LHf0LF8JOOyUdjUh+MbNZ7l5U2X5Jd7bnjc6dwwC3ffcN03CcemoYaCfBunU/Vko9/HBuJBH4MeEPH55sHCKFTIlkI2y/Pfz3v3DZZaEaqH17eP31pKPKDTfcAE8+GfpH9tsv6Wh+tOOOcPjhmjJFJE5KJBupXj0YMiTMMLtiRVhT4847a/eX1LRpIbn26RPm08o1/fuHNVBmzEg6EpHCpERSRUccEZq6OnaEP/whrD1eG8tMP/kkJJDWrcPMvmZJR/RzPXqEFRQ1pkQkHkok1bD11jB5cihznTAhdDS//HLSUWVPaWlIIl99FfpFNtss6Ygq1rgxdO8OY8aEYgkRqVlKJNVUpw5cdBG88EJo3jr4YLjpptD5XOiuvBKefRbuuAP23DPpaDYslYIvvoBJk5KORKTwKJHUkAMOgFdfhS5dwjiKzp2hijOz5IUnn4Rrr4XTTw9l0bnuyCPDzMNq3hKpeUokNWjLLcMo6qFDQ3VXu3bhF3uhWbQolNXutRfcfnvS0WSmXj3o1y/ckSxblnQ0IoVFiaSGmcE554QKoc02C7+Er7wSvv8+6chqxpo1obBg7dqQNBs1SjqizKVSoV9n9OikIxEpLEokMWnXDmbNCr/cr7oqJJQl5aeszEODBoUkec890KpV5fvnkrZtYe+91bwlUtOUSGK02WZh4scHHoDi4pBcJk9OOqqqe/RRuOUW+NOfwhTx+SiVgpkzYf78pCMRKRxKJFmQSoW7k223heOPhwsuCE1E+aSkJEwN36FDqErLV337hko7TeQoUnOUSLJk113DGJNzzoF//jOUCS9cWPlxueC778IdSN26YX2RBg2SjqjqttkmrNg4fHjtKNEWyQYlkixq2DBUdI0bF6aj32efMJAv1513HsyZE37Ft2iRdDTVl0qFyrPnyi9QICJVokSSgB49whfz7ruHCqizz4ZVq5KOqmLDh4epTwYPDs1yhaBr1zDaXZ3uIjVDiSQhLVuG0fAXXQR33QX77597HcDz5oVJGA89FK6+Ouloak6jRqGpbtw4WLky6WhE8p8SSYLq1w/zdE2eDB9/DEVFcN99uTGT8MqV0LNnqDwbNSoM6Csk/fuH9WQmTEg6EpH8p0SSA447LswkvP/+8LvfhS+5r79OLh730Nz21lswcmSoNis0hxwS+nvUvCVSfUokOWLbbWHq1DB4cdSosDjUq68mE8vdd4e+kbKBlIWoTp2QsJ95Bj76KOloRPKbEkkOqVsXLr88zNO1cmWYCPL227Pb1PXqq2HA4THHhLXOC1n//qEEeOTIpCMRyW9KJDnosMNCU9fRR8PAgXDiifD55/F/7pdfhk7opk3DHUmdAv+vo3XrkKwfeCA3+qVE8lWsXxVm1snM3jazEjMbvJ59TjazeWY218xGpm3/3szmRI+Jadt3MrOXo3OOMbM8Hh63fk2bwuOPh8GLkyaFMScvvhjf57mH/pn33w8LQDVrFt9n5ZJUCt58MyRuEama2BKJmdUFhgLHAW2APmbWptw+rYCLgYPcfQ/g/LS3V7l7u+jRJW37jcDN7r4L8AVwelzXkDQz+MtfYPr00Ox16KFw/fXxjMi+9dYwl9aNN8JBB9X8+XNVr16hek6d7iJVF+cdSQegxN0XuvsaYDTQtdw+ZwJD3f0LAHf/bEMnNDMDjgDGRZseALrVaNQ5qH370HfRowdccgl06gSfflpz558xIyzG1bVrSFy1SZMmYRGyESPCFPMisvHiTCTbAYvSXi+OtqVrDbQ2s+lmNsPMOqW919DMiqPtZcliK2CFu5f9L1/ROQEws7Oi44uXFsBShb/8ZVhH4667wkDGvfcOFUfVtXx5GF2/ww5w//3hLqi2SaXgs8/g6aeTjkQkPyXdnVoPaAV0BPoA/zazLaL3Wrh7EdAXuMXMdt6YE7v7MHcvcveiZgXS4G8GZ50Fr7wSfkkfcwz87W9V/yW9bl2oXPr00zDn1xZbVH5MIfrtb8Pfp5q3RKomzkSyBNgh7fX20bZ0i4GJ7r7W3d8DFhASC+6+JPpzIfAssA+wHNjCzOpt4JwFr23bsKbGaaeFddM7dgyTEG6sG24Ia6/fcksYt1JbNWgAffrAY4+FyjUR2ThxJpKZQKuoyqoB0BuYWG6fCYS7EcysKaGpa6GZbWlmm6RtPwiY5+4OTAN6RscPAB6L8Rpy1qabhlUKR4wIFUd77w0Ty//tbsCzz8Jll4Uv0LPPji3MvJFKhenyx42rfF8R+anYEknUj3EuMAWYD4x197lmNsTMyqqwpgDLzWweIUFc6O7Lgd2BYjN7Ldp+g7vPi44ZBPzFzEoIfSb3xHUN+aBvX5g9O0wC2bVrmPJ99eoNH/PJJ9C7d1gq9667ame/SHnt24c1Y9S8JbLxzGvBSKyioiIvLi5OOoxYrV4dZhK+7TbYd98wFmSXXX6+3/ffw1FHhUW2XnkF9twz+7HmquuuC6P5Fy6EnXZKOhqR5JnZrKiveoOS7myXGrLJJmEsyIQJ8N57IZmMGvXz/a64IjRr3XGHkkh5/fqFP4cPTzYOkXyjRFJgunYNi2a1bRuavU4//cc1N558MnTOn346DBiQbJy5qEWLULjw4IOaMkVkYyiRFKAddwx3HZdcEtY3ad8ennoqlPrutVeYCFIqlkpBSUkYpCkimVEiKVD164e7jylTwoSPxx0Ha9aE8SKNGiUdXe7q0SP8/Tz0UNKRiOQPJZICd/TRoanrtNNCB3zr1klHlNsaN4bu3cMsApVVv4lIoERSC2yzDdx7b7grkcqlUvDFF2HWZRGpnBKJSDlHHgnNm2tMiUimlEhEyqlXL1S8TZoEy5YlHY1I7lMiEalAKhUmwxw9OulIRHKfEolIBfbaK8xfpuYtkcopkYisRyoVZll+662kIxHJbUokIuvRty/UqaMxJSKVUSIRWY9ttoFjjw2JZN26pKMRyV1KJCIbkEqFRcOeey7pSERylxKJyAZ07Qqbb65Od5ENUSIR2YBGjeCkk8LKiWWzKIvIT2WUSMzsUTM73syUeKTWSaXgm2/CWi8i8nOZJob/A/oC75jZDWa2a4wxieSUQw4Ja5WoekukYhklEnd/xt37AfsC7wPPmNmLZnaamdWPM0CRpNWpE9ZymToVPvoo6WhEck/GTVVmthVwKnAG8CpwKyGxTI0lMpEc0r9/KAEeOTLpSERyT6Z9JOOBF4BfACe4exd3H+PufwI228BxnczsbTMrMbPB69nnZDObZ2ZzzWxktK2dmb0UbXvdzHql7X+/mb1nZnOiR7uNuWCRqmjdGg44AB54QMvwipRXL8P9bnP3aRW94e5FFW03s7rAUOBoYDEw08wmuvu8tH1aARcDB7n7F2a2dfTWt0DK3d8xs22BWWY2xd1XRO9f6O7jMoxdpEb07w9//CO89hq0088XkR9k2rTVxsy2KHthZlua2TmVHNMBKHH3he6+BhgNdC23z5nAUHf/AsDdP4v+XODu70TPPwI+A5plGKtILHr1CksYa0yJyE9lmkjOTLsbIPriP7OSY7YDFqW9XhxtS9caaG1m081shpl1Kn8SM+sANADeTdt8bdTkdbOZbVLRh5vZWWZWbGbFS5curSRUkcpttRV07hz6SUpLk45GJHdkmkjqmpmVvYiarRrUwOfXA1oBHYE+wL/L3fk0Bx4CTnP3stmOLgZ2A9oDTYBBFZ3Y3Ye5e5G7FzVrppsZqRmpFHz6aajgEpEg00TyFDDGzI40syOBUdG2DVkC7JD2evtoW7rFwER3X+vu7wELCIkFM2sMTAIudfcZZQe4+8cerAbuIzShiWTFb38LTZqoeUskXaaJZBAwDfhD9PgPcFElx8wEWpnZTmbWAOgNTCy3zwTC3Qhm1pTQ1LUw2n888GD5TvXoLoXoDqkb8GaG1yBSbQ0aQJ8+YZT7l18mHY1Ibsh0QOI6d7/D3XtGj7vc/ftKjikFzgWmAPOBse4+18yGmFmXaLcpwHIzm0dIVBe6+3LgZOBQ4NQKynxHmNkbwBtAU+CajbxmkWpJpeC778L8WyIC5hkUxUdlutcDbYCGZdvd/dfxhVZzioqKvLi4OOkwpEC4w267hfVKNL28FDIzm7W+IR7pMm3aug+4AygFDgceBIZXPTyR/GUW7kqefx7efz/paESSl2kiaeTu/yHcwXzg7lcCx8cXlkhuO+WU8Odw/ZwSyTiRrI6mkH/HzM41s+5sYGoUkULXogV07BiqtzRlitR2mSaS8wjzbA0E9gNOAQbEFZRIPkil4J134OWXk45EJFmVJpJo8GEvd//G3Re7+2nu3iN9bIdIbdSjR1hBUWNKpLarNJFEZb4HZyEWkbzSuDF07w6jR8Pq1UlHI5KcTJu2XjWziWbW38xOLHvEGplIHujfH774AiZNSjoSkeRkmkgaAsuBI4ATokfnuIISyRdHHRXGk6h5S2qzjNYjcffT4g5EJB/Vqwf9+sFtt8GyZdC0adIRiWRfpisk3mdm95Z/xB2cSD5IpWDtWhgzJulIRJKRadPWE4SZeCcRJmxsDHwTV1Ai+WSvvWDvvdW8JbVXpk1bj6S/NrNRwP9iiUgkD6VS8Ne/wltvhXm4RGqTTO9IymsFbF3pXiK1RN++UKcOPPRQ0pGIZF+mfSRfm9lXZQ/gcdazMqFIbbTNNnDMMSGRrFtX+f4ihSTT9Ug2d/fGaY/W5Zu7RGq7VAoWLdLU8lL7ZHpH0t3Mfpn2egsz6xZfWCL5p2tX2HxzNW9J7ZNpH8kV7v7DwqLuvgK4Ip6QRPLTL34BJ50EDz8M336bdDQi2ZNpIqlov4wqvkRqk1QKvvkmrOkuUltkmkiKzexfZrZz9PgXMCvOwETy0SGHhLVKNKZEapNME8mfgDXAGGA08B3wx7iCEslXdeqEiRynToWPPko6GpHsyLRqa6W7D3b3Indv7+6XuPvKyo4zs05m9raZlZjZ4PXsc7KZzTOzuWY2Mm37ADN7J3oMSNu+n5m9EZ3zNjOzTK5BJFv69w8lwCNHVr6vSCHItGprqpltkfZ6SzObUskxdYGhwHFAG6CPmbUpt08r4GLgIHffAzg/2t6E0Jm/P9ABuMLMtowOuwM4kzAoshXQKZNrEMmW1q1h//3VvCW1R6ZNW02jSi0A3P0LKh/Z3gEocfeF7r6G0CTWtdw+ZwJDo/Ph7p9F248Fprr759F7U4FOZtYcaOzuM9zdgQcBlSFLzkml4I034LXXko5EJH6ZJpJ1ZrZj2Qszawl4JcdsByxKe7042pauNdDazKab2Qwz61TJsdtFzzd0TpHE9eoF9evrrkRqh0wTyaXA/8zsITMbDjxHaJKqrnqE5qmOQB/g3+lNaNVhZmeZWbGZFS9durQmTimSsa22gs6dYcQIKC1NOhqReGXa2f4UUAS8DYwC/gqsquSwJcAOaa+3j7alWwxMdPe17v4esICQWNZ37JLo+YbOWRbzsKg4oKhZs2aVhCpS81Ip+PTTUMElUsgy7Ww/g7AOyV+BC4CHgCsrOWwm0MrMdjKzBkBvYGK5fSYQ7kYws6aEpq6FwBTgmKhTf0vgGGCKu38MfGVmB0TVWingsUyuQSTbfvtbaNJEzVtS+DJt2joPaA984O6HA/sAKzZ0gLuXAucSksJ8YKy7zzWzIWbWJdptCrDczOYB04AL3X25u38OXE1IRjOBIdE2gHOAu4ES4F3gyQyvQSSrGjSA3r3DKPcvv6x8f5F8ZaH4qZKdzGa6e3szmwPs7+6rzWxuVLKb84qKiry4uDjpMKQWevllOOAAuPtuOP30pKMR2ThmNsvdiyrbL9M7ksVRJ/gEYKqZPQZ8UJ0ARWqDDh3CuBLNCCyFLNPO9u7uvsLdrwQuA+5B4zdEKmUWOt2few7efz/paETisdFL7br7c+4+MRpkKCKVOOWU8Ofw4cnGIRKXqq7ZLiIZatECOnYM1VsZdEmK5B0lEpEsSKXgnXdC57tIoVEiEcmCHj2gYUONKZHCpEQikgWNG0P37jB6NKxenXQ0IjVLiUQkS1Ip+OILmDw56UhEapYSiUiWHHUUbLONmrek8CiRiGRJvXrQrx9MmgTLliUdjUjNUSIRyaJUCtauhTFjko5EpOYokYhk0V57wd57q3lLCosSiUiW9e8Pr7wCb72VdCRSqNzhySeha1dYVdnKUTVAiUQky/r2hTp1NJGj1Lx16+DRR6GoKKyHM2cOlJTE/7lKJCJZ1rw5HHNMmHtr3bqko5FCUFoalnVu2zYMfv36a7j33pBE2raN//OVSEQSkErBhx/C888nHYnkszVrwlo3u+0WJgetWxdGjYL58+G006B+/ezEoUQikoCuXWHzzdXpLlWzahXcfjvsvDOceWZY0vmxx0JTVu/eIaFkkxKJSAJ+8Qs46aQwZcrrrycdjeSLr7+Gv/8dWraEgQPh17+GKVPCZKBduoS+tyQokYgkZMiQ8Euyc2f4+OOko5Fc9vnncOWVYUmCQYNgn31Cs+hzz4X+NrNk41MiEUnIdtvBE0+EL4kuXWDlyqQjklzz6achcbRoAVddBYcdBjNnwlNPwSGHJB3dj2JNJGbWyczeNrMSMxtcwfunmtlSM5sTPc6Ith+etm2OmX1nZt2i9+43s/fS3msX5zWIxKldu9C8NXt2GF+iKi4BWLQoNF21bAk33QQnnABvvAHjx4fS3lwTWyIxs7rAUOA4oA3Qx8zaVLDrGHdvFz3uBnD3aWXbgCOAb4Gn0465MO2YOXFdg0g2dO4MN98cviQGDUo6GknSu++GzvOdd4Y77ghzs731FowcCXvumXR061cvxnN3AErcfSGAmY0GugLzNvI8PYEn3f3bGo5PJGcMHBhWULzpJmjVCs46K+mIJJvmzoXrrw+lu/Xrw+9/DxdeCDvumHRkmYmzaWs7YFHa68XRtvJ6mNnrZjbOzHao4P3ewKhy266NjrnZzDapoXhFEnXzzWE08jnnwNNPV76/5L/Zs8MAwj33hAkT4K9/hfffD6W9+ZJEIPnO9seBlu6+FzAVeCD9TTNrDrQFpqRtvhjYDWgPNAEqbAwws7PMrNjMipcuXRpH7CI1ql690F+yxx6hNPjNN5OOSOIyfTocdxzstx/8979w+eXwwQehtHebbZKObuPFmUiWAOl3GNtH237g7svdvWzh0buB/cqd42RgvLuvTTvmYw9WA/cRmtB+xt2HuXuRuxc1a9asmpcikh2bbx4quTbdNPSdfPpp0hFJTXGHZ56Bjh3h4INh1qzQnPXBB6Eia6utko6w6uJMJDOBVma2k5k1IDRRTUzfIbrjKNMFmF/uHH0o16xVdoyZGdAN0O82KSg77ACPPw5Ll4ay4G/VO5jX3MO/5wEHwNFHh/mvbrklNGENHgyNGycdYfXFlkjcvRQ4l9AsNR8Y6+5zzWyImXWJdhtoZnPN7DVgIHBq2fFm1pJwR/NcuVOPMLM3gDeApsA1cV2DSFL22y9U6sycGeblUllw/vn++7CAWbt24QfB0qVw112hMuu888LsBoXC3D3pGGJXVFTkxcXFSYchstH+9a/QATtoENxwQ9LRSCbWrg0z8V5/PSxYECZUvOQS6NMn9IPlEzOb5e6VjlzJs8sSqV3+/OdQFnzjjbDLLnDGGUlHJOvz3Xdw333h3+qDD8KdyMMPw4knJjcHVrYokYjkMLNQCvree/CHP8BOO8GRRyYdlaRbuTI0Wd10U5gz7Te/gaFDQyl30nNgZUuB50mR/FevHowdG5pIevSAeRs7pFdisWIFXHttmAfrr3+F3XcPpbzTp8Pxx9eeJAJKJCJ5oXHjUBbcsGH4kvrss6Qjqr2WLYO//S0kkL/9LVRjvfgi/Oc/cPjhtSuBlFEiEckTLVqEMtJPPw0LY61alXREtctHH8Ff/hL+Ha67Do49Fl59NST43/wm6eiSpUQikkfatw9rvb/8Mpx6qsqCs+H993/sn7rtNujZM8yNNXZs6FAXJRKRvHPiiaEyaOxYuOyypKMpXG+/HZL1LrvAvfeGNdAXLIAHHgj9IfIjVW2J5KELLghlwdddF2YLPvXUpCMqHK+/HjrRH3449En96U/h73u7iqacFUCJRCQvmYUS0/ffD1POt2wZ5nCSqlu3LkyaeOmlYa6zwYPh/PNh662Tjiz3KZGI5Kn69cOv5gMPDM1dL70Eu+6adFT5acUKGDAAJk6EXr3ColJbbpnZkMe4AAAOD0lEQVR0VPlDfSQieeyXv4RJk0JSOf74UJoqG+f118PytZMnh8kUR41SEtlYSiQiea5lS3jsMViyBLp1C1N1SGYefDCMA1m1Cp59NkymWBvHgVSXEolIATjggPClOH06nH56mLpc1m/16lDSO2AA7L9/WKnwoIOSjip/KZGIFIiTTgpVXCNHwpVXJh1N7vrwQzjkELjzTrjoIpg6FX71q6Sjym/qbBcpIIMHh4WThgwJ4x/69086otzy9NPQty+sWQOPPgrduycdUWHQHYlIATELFUeHHx6auJ5/PumIcsO6dXD11dCpEzRvDsXFSiI1SYlEpMA0aACPPAI77xy+LN95J+mIkvX553DCCXD55eFuZMYMaN066agKixKJSAHacstQFlynTlgXY/nypCNKxuzZYdniqVPh//4PHnooDDaUmqVEIlKgfv3rUBa8aFG4M1m9OumIsuuee8JgzdJSeOGFUKWl0t54KJGIFLADD4T77w9fpGecUTvKgr/7LlzrGWeE6qzZs0OJr8Qn1kRiZp3M7G0zKzGzwRW8f6qZLTWzOdHjjLT3vk/bPjFt+05m9nJ0zjFm1iDOaxDJd717h47m4cPDn4XsvffCeJB77glzZj31FDRrlnRUhS+28l8zqwsMBY4GFgMzzWyiu5dfKHSMu59bwSlWuXtFs/3fCNzs7qPN7E7gdOCOmoxdpNBcemkoC77iilAW3Ldv0hHVvMmT4ZRTwl3X449D585JR1R7xHlH0gEocfeF7r4GGA10rc4JzcyAI4Bx0aYHgG7VilKkFjCDYcPgsMPCuhr/+1/SEdWc778PFVnHHx9WL5w1S0kk2+JMJNsBi9JeL462ldfDzF43s3FmtkPa9oZmVmxmM8ysLFlsBaxw99JKziki5TRoEAbhtWwZ5uQqKUk6oupbtixUpV19dUiQL74Yigwku5LubH8caOnuewFTCXcYZVq4exHQF7jFzHbemBOb2VlRIipeunRpzUUskseaNAllwRB+wX/+ebLxVMcrr8C++4bJFocNC/0ijRolHVXtFGciWQKk32FsH237gbsvd/eyosS7gf3S3lsS/bkQeBbYB1gObGFmZX07Pztn2vHD3L3I3YuaqbdN5Ae77AITJoRFsXr0CNOF5BP3ME/WIYeEcTLTp8OZZ6q0N0lxJpKZQKuoyqoB0BuYmL6DmTVPe9kFmB9t39LMNomeNwUOAua5uwPTgJ7RMQOAx2K8BpGCdPDBYR3yZ5+F3/8+f8qCv/02LCv8hz/AEUeE/pCioqSjktiqtty91MzOBaYAdYF73X2umQ0Bit19IjDQzLoApcDnwKnR4bsDd5nZOkKyuyGt2msQMNrMrgFeBe6J6xpEClm/fqGf5Morw7rvl1ySdEQbVlIS7qDeeCPEfNll4Y5EkmeeLz9FqqGoqMiLi4uTDkMk57hDKhXGmIweHZaZzUWPPRbirFcPRowIky9K/MxsVtRXvUHK5yK1mBncfXdo6howIKz7nktKS+Hii0OVWatWoSlLSST3KJGI1HKbbALjx8P220PXrrBwYdIRBZ99BsceCzfcAGedFca+tGyZdFRSESUSEaFp0zAyvLQ0lAWvWJFsPC+9FEp7X3wR7rsP7roLGjZMNiZZPyUSEQHCGh3jx8O770LPnrB2bfZjcIfbb4dDDw13Si+9FKq0JLcpkYjIDw47LPSZ/Oc/ocQ2m7U4K1eGSrKBA+G440J/SLuKZtuTnKM120XkJ1KpsKriNdeEDu5Bg+L/zLffDqW98+fDtdeGtedV2ps/lEhE5GeGDAnjNgYPDkv29uxZ+TFV9cgjYZ6sTTaBKVPgqKPi+yyJh3K+iPyMWejkPvBA6N8fXn655j+jtBQuuCAkqTZtwgJUSiL5SYlERCrUsGGYk2vbbaFLlzA3V0355BM48kj45z/hj3+E55+HHXao/DjJTUokIrJezZqF2YLXrAllwV9+Wf1zvvAC7LMPzJwZRtT/v/8XpriX/KVEIiIbtNtuoR9jwQI46aSqlwW7w803w+GHw+abh+ayfv1qNlZJhhKJiFTqiCPCmh9Tp8K55258WfDXX4d5vP7yl9BMNnMmtG0bT6ySfaraEpGMnHZaKAu+/vpQFnzBBZkdN29eKO1dsAD+/vdwnNYOKSxKJCKSsWuuCWXBF10UyoK7d9/w/mPGwOmnw6abhkGOHTtmJUzJMjVtiUjG6tSBBx6A/fcP/RvrW51hzRo4/3zo3TuMTn/1VSWRQqZEIiIbpVGjsD7Ir34FJ5wAH3740/eXLAkd6rfeGpLJtGmhhFgKlxKJiGy0rbcOZcGrVkHnzvDVV2H7tGlh1t7XXgvNWjffDPXrJxurxE+JRESqpE0bGDcudKb36gU33hhGpjdpEqqyTj456QglW5RIRKTKjjoK7rgDnnoqzMvVsye88grsvnvSkUk2qWpLRKrlzDPDIMV69cJzlfbWPrHekZhZJzN728xKzGxwBe+famZLzWxO9Dgj2t7OzF4ys7lm9rqZ9Uo75n4zey/tGK1YIJKwc84Jy+EqidROsd2RmFldYChwNLAYmGlmE919Xrldx7j7ueW2fQuk3P0dM9sWmGVmU9y9bAHQC919XFyxi4hI5uK8I+kAlLj7QndfA4wGumZyoLsvcPd3oucfAZ8BzWKLVEREqizORLIdsCjt9eJoW3k9ouarcWb2s4mkzawD0AB4N23ztdExN5vZJjUatYiIbJSkq7YeB1q6+17AVOCB9DfNrDnwEHCau6+LNl8M7Aa0B5oAFS4EamZnmVmxmRUvXbo0rvhFRGq9OBPJEiD9DmP7aNsP3H25u6+OXt4N7Ff2npk1BiYBl7r7jLRjPvZgNXAfoQntZ9x9mLsXuXtRs2ZqFRMRiUuciWQm0MrMdjKzBkBvYGL6DtEdR5kuwPxoewNgPPBg+U71smPMzIBuwJuxXYGIiFQqtqotdy81s3OBKUBd4F53n2tmQ4Bid58IDDSzLkAp8DlwanT4ycChwFZmVrbtVHefA4wws2aAAXOAs+O6BhERqZz5xq5Qk4eKioq8eH3TlIqISIXMbJa7F1W6X21IJGa2FPigioc3BZbVYDhJKpRrKZTrAF1LriqUa6nudbRw90o7mWtFIqkOMyvOJCPng0K5lkK5DtC15KpCuZZsXUfS5b8iIpLnlEhERKRalEgqNyzpAGpQoVxLoVwH6FpyVaFcS1auQ30kIiJSLbojERGRalEiyYCZXR1NEjnHzJ6OprbPS2b2DzN7K7qe8Wa2RdIxVYWZnRStV7POzPKyuqay9XryhZnda2afmVlezzJhZjuY2TQzmxf9t3Ve0jFVlZk1NLNXzOy16FquivXz1LRVOTNr7O5fRc8HAm3cPS9H1JvZMcB/o5kHbgRw9wonvsxlZrY7sA64C7jA3fNqxGm0Xs8C0tbrAfpUsF5PzjOzQ4FvCFMa7Zl0PFUVTb/U3N1nm9nmwCygW57+mxiwqbt/Y2b1gf8B56XPW1iTdEeSgbIkEtkUyNvs6+5Pu3tp9HIGYTLNvOPu89397aTjqIYqr9eTa9z9ecIUR3ktmhB2dvT8a8LcfxUtfZHzooltv4le1o8esX1vKZFkyMyuNbNFQD/g8qTjqSG/A55MOohaKtP1eiQBZtYS2Ad4OdlIqs7M6prZHMLCgFPdPbZrUSKJmNkzZvZmBY+uAO5+qbvvAIwAyi8NnFMqu5Zon0sJk2WOSC7SDcvkOkRqmpltBjwCnF+uNSKvuPv37t6O0OrQwcxia3aMbfbffOPuR2W46whgMnBFjOFUS2XXEs2o3Bk40nO4k2wj/k3yUaXr9Uj2Rf0JjwAj3P3RpOOpCe6+wsymAZ2IadkN3ZFkwMxapb3sCryVVCzVZWadgIuALu7+bdLx1GKVrtcj2RV1UN8DzHf3fyUdT3WYWbOyikwza0Qo6ojte0tVWxkws0eAXQlVQh8AZ7t7Xv56NLMSYBNgebRpRj5WoJlZd+B2oBmwApjj7scmG9XGMbPfArfw43o91yYcUpWY2SigI2Gm2U+BK9z9nkSDqgIzOxh4AXiD8P86wCXuPjm5qKrGzPYiLF1el3DDMNbdh8T2eUokIiJSHWraEhGRalEiERGRalEiERGRalEiERGRalEiERGRalEiEakBZvZN5Xtt8PhxZvbr6PlmZnaXmb1rZrPM7Fkz29/MGpjZ82amgcSSU5RIRBJmZnsAdd19YbTpbsIkiK3cfT/gNKBpNLnjf4BeyUQqUjElEpEaZME/ojnB3jCzXtH2Omb2f9FaMFPNbLKZ9YwO6wc8Fu23M7A/8Dd3Xwfg7u+5+6Ro3wnR/iI5Q7fIIjXrRKAdsDdhpPdMM3seOAhoCbQBtiZMUX5vdMxBwKjo+R6EUfrfr+f8bwLtY4lcpIp0RyJSsw4GRkUzr34KPEf44j8YeNjd17n7J8C0tGOaA0szOXmUYNZECy+J5AQlEpHkrQIaRs/nAntHKyiuzybAd7FHJZIhJRKRmvUC0CtaVKgZcCjwCjAd6BH1lfyKMMlhmfnALgDu/i5QDFwVzUaLmbU0s+Oj51sBy9x9bbYuSKQySiQiNWs88DrwGvBf4KKoKesRwiqI84DhwGzgy+iYSfw0sZwB/AooMbM3gfsJq9wBHB7tL5IzNPuvSJaY2Wbu/k10V/EKcJC7fxKtFzEter2+TvayczwKDHb3BVkIWSQjqtoSyZ4nosWGGgBXR3cquPsqM7uCsGb7h+s7OFoAa4KSiOQa3ZGIiEi1qI9ERESqRYlERESqRYlERESqRYlERESqRYlERESqRYlERESq5f8DjuG37nxb5t8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1158f55f8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "x_axis = np.log10(C_s)\n",
    "plt.plot(x_axis, np.array(accuracy_s), 'b-')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'accuracy' )\n",
    "plt.savefig('SVM_Otto.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.6737919157126937, 0.64167764157633467, 0.69891601661432479, 0.58028568534089753, 0.52942964238678958, 0.56316482625873776, 0.58413534596292171]\n"
     ]
    }
   ],
   "source": [
    "accuracy1 = accuracy_s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in log10\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "tmp = np.log10(-1)\n",
    "SVC2 = LinearSVC(C=tmp)\n",
    "SVC2 = SVC2.fit(X_train, y_train)\n",
    "linear_svm_predict = SVC2.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 0 ..., 0 0 0]\n"
     ]
    }
   ],
   "source": [
    "print(linear_svm_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "predict_df = pd.DataFrame({'listing_id': predict_id, 'interest_level': linear_svm_predict})\n",
    "predict_df.to_csv('./predict/LinearSVC_predict.csv', columns=['listing_id', 'interest_level'], index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SVM with RBF\n",
    "def fit_grid_point_RBF(C, gamma, X_train, y_train, X_val, y_val):\n",
    "    SVC3 =  SVC( C = C, kernel='rbf', gamma = gamma, cache_size=2000)\n",
    "    SVC3 = SVC3.fit(X_train, y_train)\n",
    "    \n",
    "    accuracy = SVC3.score(X_val, y_val)\n",
    "    \n",
    "    print(\"accuracy: {}\".format(accuracy))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy: 0.6987134028973762\n",
      "accuracy: 0.6987134028973762\n",
      "accuracy: 0.6987134028973762\n",
      "accuracy: 0.6987134028973762\n"
     ]
    }
   ],
   "source": [
    "C_s = np.logspace(-1, 0, 2)\n",
    "gamma_s = np.logspace(-1, 0, 2)  \n",
    "\n",
    "accuracy_s = []\n",
    "for i, oneC in enumerate(C_s):\n",
    "    for j, gamma in enumerate(gamma_s):\n",
    "        tmp = fit_grid_point_RBF(oneC, gamma, X_train, y_train, X_test, y_test)\n",
    "        accuracy_s.append(tmp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#conclusion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
